R code for lecture 3

#analysis from last time
tadpoles <- read.table("ecol 563/tadpoles.csv", header=T, sep=',')
tadpoles$fac3 <- factor(tadpoles$fac3)
out1 <- lm(response~fac1 + fac2 + fac3 + fac1:fac2 + fac1:fac3 + fac2:fac3 + fac1:fac2:fac3, data=tadpoles)
anova(out1)
library(car)
Anova(out1)
 
# list levels of a factor
levels(tadpoles$fac1)
 
#create an experiment where the treatments are balanced
expand.grid(fac1=levels(tadpoles$fac1), fac2=levels(tadpoles$fac2), fac3=levels(tadpoles$fac3))
junk <- expand.grid(fac1=levels(tadpoles$fac1), fac2=levels(tadpoles$fac2), fac3=levels(tadpoles$fac3))
junk2 <- data.frame(fac1=rep(junk$fac1,10), fac2=rep(junk$fac2,10), fac3=rep(junk$fac3,10), y=rnorm(120))
junk2[1:8,]
 
#fit model to balanced data
mj <- lm(y~fac1*fac2*fac3, data=junk2)
 
#Type I and Type II tests are the same
anova(mj)
Anova(mj)
 
#data are unbalanced even with missing response values
table(tadpoles$treatment)
 
#get treatment counts after removing observations with missing response values
table(tadpoles$treatment[!is.na(tadpoles$response)])
table(tadpoles$fac1[!is.na(tadpoles$response)])
table(tadpoles$fac2[!is.na(tadpoles$response)])
table(tadpoles$fac3[!is.na(tadpoles$response)])
 
# we dropped a 3-factor interaction and a 2-factor interaction
Anova(out1)
out2 <- lm(response~fac1 + fac2 + fac3 + fac1:fac2  + fac2:fac3, data=tadpoles)
anova(out2)
Anova(out2)
 
#variance heterogeneity
boxplot(response~treatment, data=tadpoles, horizontal=T, xlab='Mitotic activity', las=1) 
treat.mean <- tapply(tadpoles$response, tadpoles$treatment, mean, na.rm=T)
treat.mean
points(treat.mean, 1:12, col=2, pch=8)
 
#calculate variances for each treatment
treat.var <- tapply(tadpoles$response, tadpoles$treatment, var, na.rm=T)
treat.var
 
#display variances in dot plot
library(lattice)
dotplot(names(treat.var)~treat.var, xlab='Variance')
 
#residuals look heavy-tailed relative to a normal dist
qqPlot(residuals(out2))
 
#tests for variance homogeneity
bartlett.test(tadpoles$response~tadpoles$treatment)
fligner.test(tadpoles$response~tadpoles$treatment)
leveneTest(response~treatment, data=tadpoles)
leveneTest(response~treatment, data=tadpoles, center=mean)
 
#use weighted least squares
library(nlme)
out.g <- gls(response~fac1+fac2+fac3+fac1:fac2+fac2:fac3, data=tadpoles, na.action=na.omit, method='ML')
summary(out.g)
anova(out.g)
 
#let each treatment have its own variance
out.g1 <- gls(response~fac1+fac2+fac3+fac1:fac2+fac2:fac3, data=tadpoles, na.action=na.omit, method='ML', weights=varIdent(form=~1|treatment))
anova(out.g1)
 
#Anova doesn't work on gls object 
Anova(out.g1)
 
#anova has a special method for gls objects: type argument
anova(out.g1, type='sequential')
anova(out.g1, type='marginal')
summary(out.g1)
 
#estimates have not changed much
coef(out.g1)
coef(out.g)
 
#compare residuals
qqPlot(residuals(out.g))
qqPlot(residuals(out.g1, type='normalized')
 
#simulate from final model
simulate(out2,n=1)
 
#six simulated data sets
junk <- simulate(out2,n=6)
 
#generates a data frame
class(junk)
dim(junk)
 
#observations with missing values for response area excluded
dim(tadpoles)
length(tadpoles$response[!is.na(tadpoles$response)])
 
#using the rep function
rep(1:3,4)
rep(1:3,c(4,4,4))
 
#repeat the number of simulated values
rep(nrow(junk),6)
my.dat1 <- data.frame(sims=unlist(junk),simnum = rep(1:6,rep(nrow(junk),6)))
my.dat1[1:8,]
 
# display kernel density estimates of distribution of simulated data
densityplot(~sims|simnum, data=my.dat1)
densityplot(~sims|factor(simnum), data=my.dat1, plot.points=F)
 
# add kernel density of the raw data
densityplot(~sims|factor(simnum), data=my.dat1, panel=function(x) {
panel.densityplot(x, plot.points=F)
panel.densityplot(tadpoles$response[!is.na(tadpoles$response)], plot.points=F, col=2)
})
 
# remove missing response observations from treatment and response variable
!is.na(tadpoles$response)
short.treatment <- tadpoles$treatment[!is.na(tadpoles$response)]
short.response <- tadpoles$response[!is.na(tadpoles$response)]
 
#calculate maximum response in CoD1 treatment
short.response[short.treatment=='CoD1']
CoD1.max <- max(short.response[short.treatment=='CoD1'])
CoD1.max
 
#obtain 1000 simulated data sets
junk<-simulate(out2,n=1000)
colnames(junk)[1:10]
 
#we can access the elements using $, [,], or [[]] notation
junk$sim_1
junk[,1]
junk[[1]]
 
#maximum response for coD1 treatment in simulation 1
max(junk[[1]][short.treatment=='CoD1'])
 
#obtain maximum for CoD1 treatment in each simulation
sim.max <- sapply(1:1000, function(x) max(junk[[x]][short.treatment=='CoD1']))
 
#the maximum simulated maxium is less than the actual maxiumum
max(sim.max)
CoD1.max
 
#graph the density
plot(density(sim.max))
#expand x-limits to make room for actual maximum
c(min(sim.max), max(c(sim.max,CoD1.max)))
plot(density(sim.max), xlim=c(min(sim.max), max(c(sim.max,CoD1.max))))
#add actual maximum
points(CoD1.max, 0, col=2, pch=8)
R Code for Lecture 3 (Wednesday, August 29, 2012)